function counts = letterStatistics(filename, allowedChar, N)
%This function opens a text file and reads its contents into a string
%variable. Afterwards, this string variable is parsed to remove impertinent
%characters. The N-gram frequencies of the characters in the parsed string
%are then computed.
%
% INPUTS:
%   filename - a string containing the input filename (with extension).
%   allowedChar - an array of pertinent symbols.
%   N - the length of the letter combinations, whose frequencies are to be
%       computed.
%
% OUTPUT:
%   counts - an array containing N-gram frequencies of the characters in
%            the parsed string.


if nargin < 3 || N < 1
    N = 1;
end

%Input string
fid = fopen(filename, 'rt');
str = fscanf(fid, '%c');
str = lower(str);

%Parse string
newlineChar = sprintf('\n');
str(str == newlineChar) = ' ';
str(str == '-') = ' ';
expression = sprintf('[^%s]', allowedChar);
str = regexprep(str, expression, '');
str = regexprep(str, ' {2,}', ' ');

%All possible N-letter combinations (with spaces)
allCombinations = PermsRep(allowedChar, N);

%Count of all N-letter combinations in our text
counts = zeros(1,length(allCombinations));
for ii = 1:size(allCombinations, 1);
    idx = strfind(str, allCombinations(ii,:));
    counts(ii) = length(idx);
end

fclose(fid);

end

